##### This script loads and conducts data management on the European Social Survey individual-level data

### The following R version was used when preparing the replication files
# R version 4.3.1 (2023-06-16 ucrt) -- "Beagle Scouts"

# Setting the working directory to the "replication" folder (the folder stores all the data sets used in the article)
setwd("C:/Users/ivan.petrusek/Desktop/sreview/replication")

### Loading the reduced version of the SPSS file
# DATA: European Social Survey: ESS1-9, edition 1.0
# 422985 respondents and 22 variables
library(haven)
ess <- read_sav("ESS1-9e01_1_reduced.sav", user_na = FALSE)
class(ess)
dim(ess)


##### OUTCOME VARIABLE: Redistribution support
# reversing scale on the dependent variable "red_sup" (gincdif: 1 = agree strongly, 5 = disagree strongly)
ess$red_sup <- 5 - ess$gincdif
table(ess$red_sup)
table(is.na(ess$red_sup))


##### Economic activity
## Coding of the original mnactic variable
# 1 Paid work
# 2 Education
# 3 Unemployed, looking for job
# 4 Unemployed, not looking for job
# 5 Permanently sick or disabled
# 6 Retired
# 7 Community or military service
# 8 Housework, looking after children, others
# 9 Other

# A: unemployed dummy variable
ess$unemployed <- ifelse(ess$mnactic == 3 | ess$mnactic == 4, yes = 1, no = 0) 
table(ess$unemployed)
table(ess$mnactic, ess$unemployed)
table(is.na(ess$unemployed))

# B: employed dummy variable
# Note: (9) Members of the armed forces should be included among persons in paid employment. 
# The armed forces should include both the regular and the temporary members as specified in the most recent revision of the International Standard Classification of Occupations (ISCO).
# Source: https://www.ilo.org/ilostat-files/SSM/SSM5/E/ANNEX.html
ess$employed <- ifelse(ess$mnactic == 1 | ess$mnactic == 7, yes = 1, no = 0) 
table(ess$mnactic, ess$employed)
table(is.na(ess$employed))


##### Female
## Coding of the original gndr variable
# 1 Male
# 2 Female
ess$female <- rep(NA)
ess$female[ess$gndr == 1] <- 0
ess$female[ess$gndr == 2] <- 1
table(ess$female)
table(is.na(ess$female))


##### Age: excluding respondents younger than 15 years old (as the minimum age for ESS participation is 15) and respondents older than 99 years (to exclude a few cases with extreme values)
table(ess$agea)
ess$age <- ifelse(ess$agea > 14 & ess$agea < 100 & !is.na(ess$agea), yes = ess$agea, no = NA)
table(ess$age)
table(is.na(ess$age))


##### Education attainment
# 1 Less than lower secondary education (ISCED 0-1)
# 2 Lower secondary education completed (ISCED 2)
# 3 Upper secondary education completed (ISCED 3)
# 4 Post-secondary non-tertiary education completed (ISCED 4)
# 5 Tertiary education completed (ISCED 5-6)
# 55 Other
table(ess$edulvla)

# isced 2 = Lower secondary education completed (ISCED 2)
ess$isced2 <- ifelse(ess$edulvla == 2, yes = 1, no = 0)
table(ess$isced2)
table(is.na(ess$isced2))

# isced 3 = Upper secondary education completed (ISCED 3)
ess$isced3 <- ifelse(ess$edulvla == 3, yes = 1, no = 0)
table(ess$isced3)
table(is.na(ess$isced3))

# isced 4 = Post-secondary non-tertiary education completed (ISCED 4)
ess$isced4 <- ifelse(ess$edulvla == 4, yes = 1, no = 0)
table(ess$isced4)
table(is.na(ess$isced4))

# isced 56 = Tertiary education completed (ISCED 5-6)
ess$isced56 <- ifelse(ess$edulvla == 5, yes = 1, no = 0)
table(ess$isced56)
table(is.na(ess$isced56))


##### creating country-round variable
ess$cntry_round <- paste(ess$cntry, ess$essround, "")
table(ess$cntry_round)
length(table(ess$cntry_round)) # 223 country-rounds



##### Checking which ESS rounds are available within countries
table(ess$cntry, ess$essround)

##### Selecting countries with at least FOUR  ESS rounds
### The subset of the countries is stored into the ess_4 data.frame
### 9 or 8 or 7 or 6 or 5 or 4 rounds: 27 countries, 204 country-rounds
# Russia and Ukraine are not included (since the contextual variables are not available for these two countries)
ess_4  <-  ess[ess$cntry == "AT" | ess$cntry == "BE" | ess$cntry == "BG" | ess$cntry == "CY" | 
               ess$cntry == "CZ" | ess$cntry == "DE" | ess$cntry == "DK" | ess$cntry == "EE" |
               ess$cntry == "ES" | ess$cntry == "FI" | ess$cntry == "FR" | ess$cntry == "GB" | 
               ess$cntry == "GR" | ess$cntry == "HU" | ess$cntry == "CH" | ess$cntry == "IE" | 
               ess$cntry == "IL" | ess$cntry == "IS" | ess$cntry == "IT" | ess$cntry == "LT" | 
               ess$cntry == "NL" | ess$cntry == "NO" | ess$cntry == "PL" | ess$cntry == "PT" | 
               ess$cntry == "SE" | ess$cntry == "SI" | ess$cntry == "SK", ]
table(ess_4$cntry)
# 27 countries (as required)
length(table(ess_4$cntry))
table(ess_4$cntry, ess_4$essround)
dim(ess_4)

# 204 country-rounds
length(table(ess_4$cntry_round))

# Deleting the original full ESS data.frame
rm(ess)
